""""""

"""
import urllib.request
url = 'https://ac.qq.com/ComicView/index/id/623537/cid/3?fromPrev=1'    #修真聊天群第二话
data = urllib.request.urlopen(url).read().decode('utf-8','ignore')
print(len(data))
filepath = 'C:\\Users\\DELL\\Desktop\\python操作文件夹\\腾讯动漫\\网页.html'
fh = open(filepath,"w",encoding='utf-8')
fh.write(data)
fh.close()

发现：数据是异步加载的——使用selenium+无头浏览器
"""

from selenium import webdriver
from time import sleep
#from selenium.webdriver.common.desired_capabilities import DesiredCapabilities  #这个模块可以伪装成别的浏览器
import re
import urllib.request

"""使用无界面模式"""
# 进入浏览器设置
options = webdriver.ChromeOptions()

#这一段不写上的话，会弹出浏览器界面，调用浏览器，不影响是否可以截图
#谷歌无头模式
options.add_argument('--headless')  # 开启无界面模式
options.add_argument('--disable-gpu')   # 禁用显卡
# options.add_argument('window-size=1200x600')
# 设置中文
options.add_argument('lang=zh_CN.UTF-8')

# 更换头部/# 替换UA
options.add_argument('user-agent="Mozilla/5.0 (iPod; U; CPU iPhone OS 2_1 like Mac OS X; ja-jp) AppleWebKit/525.18.1 (KHTML, like Gecko) Version/3.1.1 Mobile/5F137 Safari/525.20"')
browser = webdriver.Chrome(r'E:\chromedriver_win32 存放目录\chromedriver.exe',options=options)
"""这里不要写chrome_options=options,未来options会取代chrome_options，所以我们只需要chrome_options改成options即可"""

url = "https://ac.qq.com/ComicView/index/id/623537/cid/2?fromPrev=1"    #修真聊天群第一话
browser.get(url)
sleep(4)
browser.get_screenshot_as_file("C:\\Users\\DELL\\Desktop\\python操作文件夹\\腾讯动漫\\outline.png")

print(browser.current_url)
data1 = browser.page_source
fh = open("C:\\Users\\DELL\\Desktop\\python操作文件夹\\腾讯动漫\\第一话（无循环）.html", "w", encoding="utf-8", )
fh.write(data1)
fh.close()

# 模拟人工下滑
for i in range(8):
    # browser.execute_script('window.scrollTo(0,document.body.scrollHeight)')
    #我们电脑分辨率是1920*1080
    js = 'window.scrollTo(' + str(i * 1920) + ',' + str((i + 1) * 1920) + ')'
    browser.execute_script(js)
    sleep(1)

data2 = browser.page_source
fh = open("C:\\Users\\DELL\\Desktop\\python操作文件夹\\腾讯动漫\\第一话（有循环）.html", "w", encoding="utf-8", )
fh.write(data2)
fh.close()

pat = '<img class="comic-pic" data-src="https:..manhua.qpic.cn.manhua_detail(.*?).jpg.800'
all_id = re.compile(pat).findall(data2)
for j in range(len(all_id)):
    this_id = all_id[j]
    this_url = "https://manhua.qpic.cn/manhua_detail" + this_id +".jpg/800"
    #print(this_url)
    filepath = "C:\\Users\\DELL\\Desktop\\python操作文件夹\\腾讯动漫\\" + str(j) + ".jpg"
    urllib.request.urlretrieve(this_url, filename=filepath)

browser.quit()


